import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm

series_index = 3

# load data & select column
data = pd.read_csv("../data/econ/table2.csv", encoding='gbk')

columns = data.columns
column_name = columns[series_index]
original_series = data[[columns[series_index]]].values
series = original_series

# EDA
plt.plot(series)
plt.title(columns[series_index])
plt.show()

plot_acf(series)
plt.show()


# data preprocessing -- null value filling


# data preprocessing -- abnormal value handling


# stationary -- diff
def adf_report(series, column_name):
    adf_result = adfuller(series)
    print('The ADF Statistic of ' + column_name + ': %f' % adf_result[0])
    print('The p value of ' + column_name + ': %f' % adf_result[1])
    return adf_result


adf_p = adf_report(series, column_name)[1]

diff_order = 0

while adf_p > 0.05 and diff_order < 2:
    series = series.diff()
    adf_p = adfuller(series)[1]
    diff_order += 1

stationary = adf_p > 0.05

# stationary -- STL
if not stationary:
    rd = sm.tsa.seasonal_decompose(series, period=7)
    rd.plot()
    plt.show()

    seasonal_factor = rd.seasonal.tolist()
    trend_factor = np.nan_to_num(rd.trend)
    residual_factor = np.nan_to_num(rd.resid)

    adf_report(seasonal_factor, "season")
    adf_report(trend_factor, "trend")

# order selection


# model fitting


# model compare & select
